{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Removing outliers - outlier trimming"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import fetch_california_housing\n",
    "from sklearn.model_selection import train_test_split\n",
    "from feature_engine.outliers import OutlierTrimmer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MedInc</th>\n",
       "      <th>HouseAge</th>\n",
       "      <th>AveRooms</th>\n",
       "      <th>AveBedrms</th>\n",
       "      <th>Population</th>\n",
       "      <th>AveOccup</th>\n",
       "      <th>Latitude</th>\n",
       "      <th>Longitude</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8.3252</td>\n",
       "      <td>41.0</td>\n",
       "      <td>6.984127</td>\n",
       "      <td>1.023810</td>\n",
       "      <td>322.0</td>\n",
       "      <td>2.555556</td>\n",
       "      <td>37.88</td>\n",
       "      <td>-122.23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8.3014</td>\n",
       "      <td>21.0</td>\n",
       "      <td>6.238137</td>\n",
       "      <td>0.971880</td>\n",
       "      <td>2401.0</td>\n",
       "      <td>2.109842</td>\n",
       "      <td>37.86</td>\n",
       "      <td>-122.22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7.2574</td>\n",
       "      <td>52.0</td>\n",
       "      <td>8.288136</td>\n",
       "      <td>1.073446</td>\n",
       "      <td>496.0</td>\n",
       "      <td>2.802260</td>\n",
       "      <td>37.85</td>\n",
       "      <td>-122.24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5.6431</td>\n",
       "      <td>52.0</td>\n",
       "      <td>5.817352</td>\n",
       "      <td>1.073059</td>\n",
       "      <td>558.0</td>\n",
       "      <td>2.547945</td>\n",
       "      <td>37.85</td>\n",
       "      <td>-122.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3.8462</td>\n",
       "      <td>52.0</td>\n",
       "      <td>6.281853</td>\n",
       "      <td>1.081081</td>\n",
       "      <td>565.0</td>\n",
       "      <td>2.181467</td>\n",
       "      <td>37.85</td>\n",
       "      <td>-122.25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \\\n",
       "0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   \n",
       "1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   \n",
       "2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   \n",
       "3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   \n",
       "4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   \n",
       "\n",
       "   Longitude  \n",
       "0    -122.23  \n",
       "1    -122.22  \n",
       "2    -122.24  \n",
       "3    -122.25  \n",
       "4    -122.25  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# load the California House price data from Scikit-learn\n",
    "X, y = fetch_california_housing(return_X_y=True, as_frame=True)\n",
    "\n",
    "X.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# let's separate the data into training and testing sets\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    X, y, test_size=0.3, random_state=0,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_limits(df, variable, fold):\n",
    "    q1 = df[variable].quantile(0.25)\n",
    "    q3 = df[variable].quantile(0.75)\n",
    "\n",
    "    IQR = q3 - q1\n",
    "\n",
    "    lower_limit = q1 - (IQR * fold)\n",
    "    upper_limit = q3 + (IQR * fold)\n",
    "\n",
    "    return lower_limit, upper_limit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-3.925900000000002, 11.232600000000001)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# we find the limits\n",
    "\n",
    "lower_limit, upper_limit = find_limits(X_train, \"MedInc\", 3)\n",
    "lower_limit, upper_limit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Remove outliers on the right\n",
    "\n",
    "inliers = X_train[\"MedInc\"].ge(lower_limit)\n",
    "train_t = X_train.loc[inliers]\n",
    "\n",
    "inliers = X_test[\"MedInc\"].ge(lower_limit)\n",
    "test_t = X_test.loc[inliers]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Remove outliers on the left\n",
    "\n",
    "inliers = X_train[\"MedInc\"].le(upper_limit)\n",
    "train_t = X_train.loc[inliers]\n",
    "\n",
    "inliers = X_test[\"MedInc\"].le(upper_limit)\n",
    "test_t = X_test.loc[inliers]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((14448, 8), (6192, 8))"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.shape, X_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((14348, 8), (6151, 8))"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_t.shape, test_t.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Feature-engine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-1 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-1 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-1 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-1 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-1 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-1 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-1 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-1 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>OutlierTrimmer(capping_method=&#x27;iqr&#x27;, fold=1.5, tail=&#x27;both&#x27;,\n",
       "               variables=[&#x27;MedInc&#x27;, &#x27;HouseAge&#x27;, &#x27;Population&#x27;])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;OutlierTrimmer<span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>OutlierTrimmer(capping_method=&#x27;iqr&#x27;, fold=1.5, tail=&#x27;both&#x27;,\n",
       "               variables=[&#x27;MedInc&#x27;, &#x27;HouseAge&#x27;, &#x27;Population&#x27;])</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "OutlierTrimmer(capping_method='iqr', fold=1.5, tail='both',\n",
       "               variables=['MedInc', 'HouseAge', 'Population'])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trimmer = OutlierTrimmer(\n",
    "    variables=[\"MedInc\", \"HouseAge\", \"Population\"],\n",
    "    capping_method=\"iqr\",\n",
    "    tail=\"both\",\n",
    "    fold=1.5,\n",
    ")\n",
    "\n",
    "trimmer.fit(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'MedInc': -0.6776500000000012, 'HouseAge': -10.5, 'Population': -626.0}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trimmer.left_tail_caps_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'MedInc': 7.984350000000001, 'HouseAge': 65.5, 'Population': 3134.0}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trimmer.right_tail_caps_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_t = trimmer.transform(X_train)\n",
    "test_t = trimmer.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((14448, 8), (6192, 8))"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.shape, X_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((13165, 8), (5619, 8))"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_t.shape, test_t.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "def plot_boxplot_and_hist(data, variable):\n",
    "    f, (ax_box, ax_hist) = plt.subplots(\n",
    "        2, sharex=True, gridspec_kw={\"height_ratios\": (0.50, 0.85)}\n",
    "    )\n",
    "\n",
    "    sns.boxplot(x=data[variable], ax=ax_box)\n",
    "    sns.histplot(data=data, x=variable, ax=ax_hist)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAAsTAAALEwEAmpwYAAAbYUlEQVR4nO3dfZRU9Z3n8feH7kYaoeUZDdK2G5UxSCIueVAncxiJrnGy4myyGZksMRNddMdkyMOEPGdm4ozHkJxEMxMlDBoN8eC6Rk+Im0lUDImbjA+gUYJMRxIVMSACIkRB6e7v/lG3LtXV1d1FU9W3qvvzOqdP3+f6NnTXp+7v/u7vKiIwMzMDGJF1AWZmVjscCmZmlnIomJlZyqFgZmYph4KZmaUasy7gSEyaNCna2tqyLsPMrK6sX79+Z0RMLrWurkOhra2NdevWZV2GmVldkfRsb+vcfGRmZimHgpmZpRwKZmaWciiYmVnKoWBmZimHgpmZpeq6S+pw8ufvfR8v7dqZdRmHZfzESdz1/TuyLsPMDoNDoU68tGsn+9764Yodb+wjN1X0eCU9clN1j29mFefmIzMzSzkUzMws5VAwM7OUQ8HMzFIOBTMzSzkUzMws5VAwM7OUQ8HMzFIOBTMzSzkUzMws5VAwM7OUQ8HMzFLDNhTmzp2bdQk2zPh3zurBsA0FMzPryaFgZmYph4KZmaUcCmZmlnIomJlZyqFgZmYph4KZmaUcCmZmlnIomJlZyqFgZmYph4KZmaUasy7AbDjIj3vk8Y9qx6hRozhw4MCA9m1oaKCzs5OmpiYOHjxIU1MTHR0dTJo0iQMHDrBv3750u66uLk444QQWLlzIvHnzAFizZg0rV65ky5YttLa2dlvXm/w+zzzzTI91a9euHdDPUYrPFMyqzEFQHQ0NDTQ2HvpcK4mGhoZu24wfP77b/MSJExk1ahRAGggtLS0ljy+JkSNHdls2c+ZMRowYwYgRubfOGTNm0NDQwNy5c2lpaWHv3r288sornH322UyYMIHRo0fT0tLCO9/5TlasWMGaNWtYs2YNK1asYPHixdxzzz0sXrw4Xdeb/D6FgdDc3JxOV/J3zKFgZj3e/LIgKZ3Ov+GNHTs2XVb4Ztzc3ExnZycdHR3d1hcf5w9/+EO6fOTIkezatYtx48al20hi7969nHXWWd0CpbGxkYhgwoQJ6f4Av/nNb7jiiivo6upiwoQJtLe3c/nll/PUU08xatQoOjs7ueKKK3j44Yf54he/yJe//GVGjRrFAw88wJIlS1i5ciUrV65kyZIlzJ49m8bGRmbPnp2u601+n7y1a9dy9dVX09bWVv4/cJnqrvlI0iJgEUBra+sRHcuf4KrP/8b14fXXX8+6BCIinX7ttdeA3Jt6XldXVzpdqtmns7Ozx7KDBw/2mN6xY0f6WvnvS5Ys4aKLLkq3zYfNjh07ur3uwYMHufDCC7n++uvZs2cPXV1dXHjhhSxbtoyIICLS9bNmzUqPIYlZs2axZcsWgHRdXuG6UrZs2ZLus2DBgm77LFiwgFWrVvW67+Gqu1CIiOXAcoA5c+ZEP5v3qZLtcNVWr2+u9fRvXC318H83cuTIzINBUvomfdRRR7F//37GjBmTts/nP7F3dXUxatQo9u/f323//Cf9rq6u9DhNTU10dnbS1dVFU1MTr7/+OlOmTOGFF14gItLXXLp0aXqdAHJnCh0dHUyZMqVbMDQ1NbF69WoaGho45phj2LdvH6tXr6a1tZX9+/eza9cuVq9eTVNTExs2bABgypQpNDc3s2HDhvSD7IYNG5g9e3Zae+G6UlpbW9PjrVq1issvvzzdp5KBAHUYCmZWeVkHAnQ/U8i/4ecDAbqfKezfv5+GhgYkpZ/qu7q6GDFiRLfjjBkzhpdeegnI/YwTJ05kz5493c4UWlpa+OUvf9mtlo6ODiSxe/fubq97yimnsGzZMhoaGti9ezennXYa3/72tznnnHN4+OGHaWhoYNmyZZx55plcddVVHDx4kBEjRnDeeeexdOlSLrvsMgCWLl3KkiVLmDVrFhs2bOi2rpSFCxeydOnSdH7u3Lk0Nzf3CMZKcCiYVdnatWvr4myh3hQ3F0VEj2X5QMjbtWtXOp3vfbR3796Sx4+IHmG5ceNG4NB1i/b2djo7O1m7dm233ke/+MUvgEO9jx544AEuu+yybj2MrrvuurT3UfG6Yvl1hb2PCgOhkmfkDgWzQZAPBjenGeTe5PvrglqJfQbCvY/MzCzlUDAzs5RDwczMUg4FMzNLORTMzCzlUDAzs5RDwczMUg4FMzNLORTMzCzlUDAzs5RDwczMUsM2FDwGjQ02/85ZPRi2oWBmZj05FMzMLOVQMDOzlEPBzMxSDgUzM0s5FMzMLOVQMDOzlEPBzMxSDgUzM0s5FMzMLOVQMDOzlEPBzMxSjVkXYOUZP3ESPHJTRY85tsLHKzZ+4qSqHt/MKs+hUCfu+v4dWZdgZsOAm4/MzCzlUDAzs5RDwczMUg4FMzNLORTMzCzlUDAzs5QiIusaBkzSi8CzRYsnATszKOdwuc7Kcp2V5Torq9bqPCEiJpdaUdehUIqkdRExJ+s6+uM6K8t1VpbrrKx6qRPcfGRmZgUcCmZmlhqKobA86wLK5Dorq+bqlBSSTipaXHN19sJ1Vla91Dn0rimYDYSkZ4A3AG+IiJ0Fyx8DTgdOjIhnDvOYAZwcEZsl3QxsjYgvVKpms2oYimcKZgP1NLAgPyNpFjA6u3LMBp9DweyQlcAHC+YvAb6bn5F0lKSvSdoi6QVJyyQ1F6z/lKRtkn4v6cO9vYiktqRp6ZLkWDslfb5gfYOkz0n6raR9ktZLml7hn9WsJIeC2SEPAi2STpXUAFwMfK9g/TXAKeSak04CpgFfApB0PvC3wLnAycC7yni9PwZmAPOAL0k6NVn+CXJnLBcALcCHgVeP5AczK5dDway7/NnCucAm4PlkuYBFwMcjYndE7AOuJhccAO8HvhMRv46IV4C/L+O1/iEi9kfE48DjwFuS5ZcBX4iI9sh5PCJ2VeKHM+uPH7Jj1t1K4OfAiRQ0HQGTyV1fWC8pv0xAQzL9BmB9wfbFd9qXsr1g+lVgTDI9HfjtYVVtViE+UzArEBHPkrvgfAFwZ8GqncB+YGZEjEu+jomI/Bv5NnJv5nmtR1DGc8Abj2B/swFzKJj1dClwTtIMlNcF/CvwDUlTACRNk/RfkvW3Ax+S9CZJo4G/O4LXXwFcJelk5bxZ0sQjOJ5Z2RwKZkUi4rcRsa7Eqk8Dm4EHJe0F7iN3oZiI+DfgWuD+ZJv7j6CEr5MLmXuAvcCNQHOfe5hViG9eMzOzlM8UzMws5VAwM7OUQ8HMzFIOBTMzS9X1zWuTJk2Ktra2rMswM6sr69ev39nb4zjrOhTa2tpYt65Uz0EzM+uNpF7vuHfzkZmZpRwKZmaWciiYmVmqrq8pWM7BgwfZuHFjt2UzZ86kqakpo4rMrF45FIaAjRs38tff+iFjj80NzLlv+xauvxJOP/30bAszs7rjUBgixh7byvjpp2RdhpnVOV9TMDOzlEPBzMxSDgUzM0v5msIQ1NXZQXt7e7dl+d5I7qlkZn1xKAxBf3jxeb5y92tM/vUBoHtvJPdUMrO+OBSGqKOnTO+1N5J7KplZbxwKdcLNPmY2GBwKdcLNPmY2GBwKdcTNPmZWbe6SamZmKYeCmZmlHApmZpbyNYUaVdzbqL29nYjIsCIzGw4cCjWquLfR9o0P0XLimzOuysyGOodCDSvsbbR3e6/P2TYzqxiHQoaKm4h8M5qZZc2hkKHCJqJavRnNwWU2vDgUMlbrN6TVQ3CZWeVUtUuqpHGS7pD0H5I2STpT0gRJ90p6Kvk+PtlWkr4pabOkJySdUc3arHz54Mpf9Dazoava9ylcB/w4Iv4IeAuwCfgMsCYiTgbWJPMA7wZOTr4WATdUuTYzMytStVCQdAzwJ8CNABHxekTsAeYDtySb3QJclEzPB74bOQ8C4yQdV636zMysp2qeKZwIvAh8R9JjklZIOhqYGhHbkm22A1OT6WnAcwX7b02WdSNpkaR1kta9+OKLVSzfzGz4qWYoNAJnADdExGzgFQ41FQEQuVt0D+s23YhYHhFzImLO5MmTK1asmZlVt/fRVmBrRDyUzN9BLhRekHRcRGxLmod2JOufB6YX7H98smxYKH6usoe1MLMsVC0UImK7pOckzYiIdmAe8GTydQlwTfL9B8kuq4GPSLoNeDvwckEz05BX/Fzl/oa1KAwRB4iZVUq171P4KHCrpJHA74C/ItdkdbukS4Fngfcn2/4IuADYDLyabDusFD5Xub9hLQpDpJLjInkgPrPhraqhEBG/AuaUWDWvxLYBXFnNeoaafIhUclwkD8RnNrz5juZhrtS1jDFTyz9jMbOhxaEwDPR1/eFwr2WY2dDmUBgG+rv+cDjXMsxsaPPjOIeJ/Bv/6InHZl2KmdUwh4KZmaUcCmZmlnIomJlZyheaB5FvDDOzWudQGES+MczMap1DYZAVPn7T3T/NrNb4moKZmaUcCmZmlnLzkZWteJwkgJkzZ9LU1JRRRWZWaQ4FK1vxOEn7tm/h+ivh9NNPz7YwM6sYh4IdlsJxksxs6CnrmoKks8tZZmZm9a3cC83/XOYyMzOrY302H0k6EzgLmCzpEwWrWoCGahZmZmaDr79rCiOBMcl2YwuW7wXeV62izMwsG32GQkT8DPiZpJsjwrffWp+Kx3Zyd1Wz+lNu76OjJC0H2gr3iYhzqlGU1afCsZ3cXdWsPpUbCv8HWAasADqrV47Vu8Kxncys/pQbCh0RcUNVKzEzs8yV2yX1h5L+WtJxkibkv8rZUVKDpMck3Z3MnyjpIUmbJf1vSSOT5Ucl85uT9W0D+5HMzGygyj1TuCT5/qmCZQH8pzL2XQxsIteNFeArwDci4jZJy4BLgRuS7y9FxEmSLk62+4sy67MMFI+F5IcGmdW/skIhIk4cyMElHQ/8GfBPwCckCTgH+Mtkk1uAvycXCvOTaYA7gH+RpKizd5nh1AOneCwkPzTIrP6VFQqSPlhqeUR8t59drwWWcOgeh4nAnojoSOa3AtOS6WnAc8lxOyS9nGy/s6iWRcAigNbW1nLKH1SFPXBe/v3TfPy8dmbMmAEMzU/ShWMh+aFBZvWv3OajtxZMjwLmAY8CvYaCpPcAOyJivaS5Ay2wWEQsB5YDzJkzpybfYfM9cPZuf5av3P2EP0mbWd0ot/noo4XzksYBt/Wz29nAhZIuIBckLcB1wDhJjcnZwvHA88n2zwPTga2SGoFjgF1l/hw1y5+kzayeDPTJa68AfV5niIjPRsTxEdEGXAzcHxEfAH7KoSEyLgF+kEyv5tAF7fcl29fkmYCZ2VBV7jWFH5LrbQS5gfBOBW4f4Gt+GrhN0j8CjwE3JstvBFZK2gzsJhckZmY2iMq9pvC1gukO4NmI2Frui0TEWmBtMv074G0ltjkA/Pdyj2lmZpVX7jWFn0mayqELzk9VryQbDoq77sLQ7r5rVi/KbT56P/BVcp/2BfyzpE9FxB1VrM2GkOIQaG9v59r72mk59gTAz3s2qxXlNh99HnhrROwAkDQZuI/cTWZm/Sq8fwMOdc/14HlmtaXcUBiRD4TELgbec8mGgVJDYIyZ6u65ZrWu3FD4saSfAKuS+b8AflSdkmwo8BAYZvWpv2c0nwRMjYhPSfpvwB8nq/4duLXaxVl98417ZvWnvzOFa4HPAkTEncCdAJJmJev+axVrMzOzQdbfdYGpEbGheGGyrK0qFZmZWWb6C4VxfaxrrmAdZmZWA/oLhXWS/mfxQkmXAeurU5KZmWWlv2sKHwPukvQBDoXAHGAk8OdVrMvMzDLQZyhExAvAWZL+FDgtWfx/I+L+qldmZmaDrtyxj35KbshrMzMbwsq9ec1s0HiwPLPsOBSs5hSPk+TB8swGj0PBalL+OddmNrgcClYTCgfQa29vx09iNcuGQ+EIlXpOgN/QDl/hAHoePM8sOw6FI9TbcwLs8OUH0PPgeWbZcShUQGH7t9/QzKyeORSsrrn7qlllORSsrrn7qlllORSs7rn7qlnlVO05y5KmS/qppCclbZS0OFk+QdK9kp5Kvo9PlkvSNyVtlvSEpDOqVZuZmZVWzTOFDuCTEfGopLHAekn3Ah8C1kTENZI+A3wG+DTwbuDk5OvtwA3JdxvmCu9hyPN1A7PqqFooRMQ2YFsyvU/SJmAaMB+Ym2x2C7CWXCjMB74buU7+D0oaJ+m45Dg2jBXewwC+bmBWTYNyTUFSGzAbeIjcIz7zb/TbganJ9DTguYLdtibLuoWCpEXAIoDW1tbqFW01JX8Pg5lVV9WuKeRJGgN8H/hYROwtXJecFRzW7b8RsTwi5kTEnMmTJ1ewUjMzq2ooSGoiFwi3RsSdyeIXJB2XrD8O2JEsfx6YXrD78ckyMzMbJNXsfSTgRmBTRHy9YNVq4JJk+hLgBwXLP5j0QnoH8LKvJ5iZDa5qXlM4G1gIbJD0q2TZ54BrgNslXQo8C7w/Wfcj4AJgM/Aq8FdVrG3APACemQ1l1ex99P8A9bJ6XontA7iyWvVUigfAy15fw2y7+6rZkfEdzQPgAfCy1dcw2+6+anZkHApWl/oaZru37qsePM+sfw4FGzY8eJ5Z/xwKNqx48DyzvjkU+uHeRmY2nDgU+uHeRvWtr55KZtaTQ6EM7m1Uv/rqqWRmPTkUbMjrraeS72kw68mhYMOW72kw68mhYMOah+Q2667qQ2ebmVn9cCiYmVnKzUdmCV94NnMolFR4w5r7tg8fvvBs5lAoqfCGNfdtH17KHUzv4MGDAOlZRPG8zzCsXjkUepG/Yc03qxmUvrO94ejxTG47pcf8y79/mo+f186MGTPS/R0SVi8cCma9KB4iY8zU6d3ubG9smVJyfu/2Z/nK3U+4GcrqkkPBrBdHMkSG73+weuVQMOtDXw/zGaj+rk+4qcmy5FDAw2NbdRV3dW1vb+fa+9ppOfYEwNcjrH+D+dRAhwIeHtuqq7ira/73q5zrEQ4Jg8F9aqBDIeHhsa2aCq8x9Pf7VbxtXyFRrYAo/GRa3LxVzde13g3WUwOHZSi4ucjqSW8hcSRnEf01RxTfq1PY/bb4dR0aQ0tNhYKk84HrgAZgRURcU43XcXOR1bPCi999nUX09WZd/DdQvG9hF9xS3W8LX7ev0OjvJr/Cmqw21EwoSGoAvgWcC2wFHpG0OiKerMbrubnIhoK+mpr6erMudd9F8b59fVAqft3eQqOvm/yKa+ovQPqar9a2/e0L3UOt+AysHgOvZkIBeBuwOSJ+ByDpNmA+UJVQ2Ld9Szr96q7tNBx4jZdGN/eY72tdJff161T/dWqxpoq/ztHjS/6+A+x/aQdf+s7TjDt2A7uefpKW1lOR1Ou+r+x4buA/Tx919FVTQ/NYxh07HeCw5qu1bX/7vrr7Bb7wgXd1O8P6x1vvY/SEqT3WHYn29vZu71m56bcc8XFLUa20pUt6H3B+RFyWzC8E3h4RHynabhGwKJmdAXQf1hImATurXG4luM7Kcp2V5Torq9bqPCEiJpdaUUtnCmWJiOXA8t7WS1oXEXMGsaQBcZ2V5Tory3VWVr3UCbX1kJ3ngekF88cny8zMbJDUUig8Apws6URJI4GLgdUZ12RmNqzUTPNRRHRI+gjwE3JdUm+KiI397FZKr01LNcZ1VpbrrCzXWVn1UmftXGg2M7Ps1VLzkZmZZcyhYGZmqSETCpLOl9QuabOkz2RdTymSpkv6qaQnJW2UtDjrmvoiqUHSY5LuzrqW3kgaJ+kOSf8haZOkM7OuqRRJH0/+z38taZWkUVnXlCfpJkk7JP26YNkESfdKeir5Xt7daINb41eT//cnJN0laVyGJeZr6lFnwbpPSgpJk7KorVxDIhQKhsh4N/AmYIGkN2VbVUkdwCcj4k3AO4Ara7TOvMXApqyL6Md1wI8j4o/I3eJZc/VKmgb8DTAnIk4j15Hi4myr6uZm4PyiZZ8B1kTEycCaZD5LN9OzxnuB0yLizcBvgM8OdlEl3EzPOpE0HTgP2FK8rtYMiVCgYIiMiHgdyA+RUVMiYltEPJpM7yP3BjYt26pKk3Q88GfAiqxr6Y2kY4A/AW4EiIjXI2JPpkX1rhFoltQIjAZ+n3E9qYj4ObC7aPF84JZk+hbgosGsqVipGiPinojoSGYfJHdvU6Z6+bcE+AawBKj5nj1DJRSmAc8VzG+lRt9s8yS1AbOBhzIupTfXkvsl7sq4jr6cCLwIfCdp5loh6eisiyoWEc8DXyP3KXEb8HJE3JNtVf2aGhHbkuntwNQsiynDh4F/y7qIUiTNB56PiMezrqUcQyUU6oqkMcD3gY9FxN6s6ykm6T3AjohYn3Ut/WgEzgBuiIjZwCtk38zRQ9IeP59ciL0BOFrS/8i2qvJFrt96zX7ClfR5ck2zt2ZdSzFJo4HPAV/KupZyDZVQqJshMiQ1kQuEWyPizqzr6cXZwIWSniHXFHeOpO9lW1JJW4GtEZE/27qDXEjUmncBT0fEixFxELgTOCvjmvrzgqTjAJLvOzKupyRJHwLeA3wgavOmqzeS+zDwePL3dDzwqKRjM62qD0MlFOpiiAzlxim+EdgUEV/Pup7eRMRnI+L4iGgj9295f0TU3CfbiNgOPCcpPzbxPKo01PoR2gK8Q9Lo5HdgHjV4QbzIauCSZPoS4AcZ1lJS8lCuJcCFEfFq1vWUEhEbImJKRLQlf09bgTOS392aNCRCIbnYlB8iYxNw+wCHyKi2s4GF5D55/yr5uiDrourcR4FbJT0BnA5cnW05PSVnMncAjwIbyP3d1cywB5JWAf8OzJC0VdKlwDXAuZKeInemU5WnIB5hjf8CjAXuTf6WlmVZI/RaZ13xMBdmZpYaEmcKZmZWGQ4FMzNLORTMzCzlUDAzs5RDwczMUg4FsxKS0Sy/VzDfKOnFwx0xVtJaSXOS6WdqfYRMM4eCWWmvAKdJak7mz6VG75I3qySHglnvfkRupFiABcCq/ApJRydj5z+cDMY3P1neLOm25NkOdwHNxQeV1Jas/9fkGQv35MNH0kmS7pP0uKRHJb2x+j+m2SEOBbPe3QZcnDwQ5810H9H28+SG/3gb8KfAV5MRWv8X8GpEnAr8HfCfezn2ycC3ImImsAd4b7L81mT5W8iNj7St9O5m1dGYdQFmtSoinkiGOF9A7qyh0HnkBg3822R+FNBK7vkO3yzY/4leDv90RPwqmV4PtEkaC0yLiLuS/Q9U6mcxK5dDwaxvq8k9C2EuMLFguYD3RkR74ca58e7K8lrBdCclmpnMsuDmI7O+3QT8Q0RsKFr+E+CjyainSJqdLP858JfJstPINTuVJXka31ZJFyX7H5WMx282aBwKZn2IiK0R8c0Sq64CmoAnJG1M5gFuAMZI2gR8mVzT0OFYCPxN0uz0S6Bmx923ocmjpJqZWcpnCmZmlnIomJlZyqFgZmYph4KZmaUcCmZmlnIomJlZyqFgZmap/w8NNnoNQ7xpSAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_boxplot_and_hist(X_train, \"MedInc\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVIklEQVR4nO3de7BlZX3m8e9jt8hVQbqHwQY8PYFhTEgUpjWJWBaxg6PEiBkdRA0i4nTVjDoaR6NC4iWJKafGMupMCqvDRUBKRC4lJoyiXOI4I0g3VxWdabl2p4FuvIBg4gC/+WOvs9icPuf07j57n7VPn++n6tRZ+117r/07Su9nr3e977tSVUiSBPC0rguQJI0PQ0GS1DIUJEktQ0GS1DIUJEmtpV0XMBfLli2riYmJrsuQpAVl/fr1W6tq+XT7FnQoTExMsG7duq7LkKQFJcndM+2z+0iS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEmtBT0kVePhD177On7y4Nauy9gl7Lf/Mi675OKuy9AiZihozn7y4FYefuFbuy5jG/vccPZY1jWrG87uugItcnYfSZJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqbVoQ+GYY47pugRJi9w4fg4t2lCQJG3LUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVJradcFSNJidMoppwBzW/9oyZIlnHbaaaxevXpIVXmmIEnz7pRTTuHOO++c83Eef/xxPvaxj3HVVVcNoaoeQ0GS5tkwAmHSE088wfnnnz+04y247qMka4A1AIcccsicjjWOy9ZK/nepHXXPPfcM7VgLLhSqai2wFmDVqlU1l2Nde+21wyhp0fNDbLj873LXN+x/M3P9gtzP7iNJmmcrV64c2rGe9rSncdJJJw3veEM7kiRpIOecc85QgmHJkiWcfvrpQx19tOC6jyRpV3DOOedwzDHHjF13oWcKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqTWog2FcVtvRNLiM46fQ4s2FCRJ2zIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEmtpV0XoIVvv/2XwQ1nd13GtPYZ07pmst/+y7ouQYucoaA5u+ySi7suQdKQ2H0kSWoZCpKklqEgSWoZCpKklqEgSWoZCpKkVqqq6xp2WpItwN1d19FnGbC16yJmMe71gTUOw7jXB9Y4DHOp77lVtXy6HQs6FMZNknVVtarrOmYy7vWBNQ7DuNcH1jgMo6rP7iNJUstQkCS1DIXhWtt1Adsx7vXBLlRjkkpy6KiLmcYu879hx8a9xpHU5zUFCUhyF/Ac4DlVtbWv/SbgBcDKqrprB49ZwGFVtSHJ54CNVfUnw6pZGgXPFKQn3Qm8YfJBkl8H9uyuHGn+GQrSk84H3tz3+GTgvMkHSZ6R5BNJ7klyf5LPJtmjb//7kmxO8g9J3jrTmySZaLqWTm6OtTXJ6X37lyQ5LcmPkjycZH2Sg4f8t0rTMhSkJ10HPDPJ85IsAU4EPt+3/+PAv6TXnXQosAL4EECSVwDvBY4FDgN+d4D3ewlwOLAa+FCS5zXt76F3xnIc8EzgrcCjc/nDpEEZCtJTTZ4tHAvcDmxq2gOsAf6oqn5cVQ8Df0kvOABOAM6pqu9W1SPARwZ4r49W1S+q6hbgFuD5TfvbgD+pqh9Wzy1V9eAw/jhpe7zJjvRU5wPfBFbS13UELKd3fWF9ksm2AEua7ecA6/ueP8hM+/v6th8F9m62DwZ+tENVS0PimYLUp6rupnfB+Tjg0r5dW4FfAL9WVfs2P8+qqskP8s30PswnHTKHMu4FfmUOr5d2mqEgbetU4GVNN9CkJ4C/Af4qyT8DSLIiyb9p9l8EvCXJrybZE/jwHN7/TODPkxyWnt9Isv8cjicNzFCQpqiqH1XVuml2vR/YAFyX5CHgG/QuFFNV/wP4FHB185yr51DCJ+mFzJXAQ8BZwB6zvkIaEievSZJanilIklqGgiSpZShIklqGgiSptaAnry1btqwmJia6LkOSFpT169dvnel2nAs6FCYmJli3brqRg5KkmSSZcca93UeSpJahIElqGQqSpJahIElqLegLzRoPv//a17N5y4+n3Xfg8mfzlUu+OBbHlLR9hoLmbPOWH3PoH/7ZtPs2fP5DY3NMSdtn95EkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJaLnOhRcP1lKTtG2koJNkXOBM4AijgrcAPgS8CE8BdwAlV9ZMkAT4NHAc8Crylqm4cZX1aXFxPSdq+UXcffRr4alX9K+D5wO3AB4Crquow4KrmMcArgcOanzXAGSOuTZI0xchCIcmzgJcCZwFU1S+r6qfA8cC5zdPOBV7TbB8PnFc91wH7JjlwVPVJkrY1yjOFlcAW4JwkNyU5M8lewAFVtbl5zn3AAc32CuDevtdvbNqeIsmaJOuSrNuyZcsIy5ekxWeUobAUOAo4o6qOBB7hya4iAKqq6F1rGFhVra2qVVW1avny5UMrVpI02lDYCGysquubxxfTC4n7J7uFmt8PNPs3AQf3vf6gpk2SNE9GFgpVdR9wb5LDm6bVwPeBy4GTm7aTgS8325cDb07PbwE/6+tmkiTNg1HPU3gncEGS3YA7gFPoBdFFSU4F7gZOaJ57Bb3hqBvoDUk9ZcS1qWOzzRu44867OHSe65E04lCoqpuBVdPsWj3Ncwt4+yjr0XiZbd7A//nwm+a5GkngMheSpD6GgiSp5dpHWnDu2LCBVS89dtp9o1jDyDWTtJgYCovQTB9yC+UD7rHKvK5h5JpJWkwMhUVopg+5Kz/yxqF/A5/tW/0oRhjN9/tJuxpDQa1RfAOf7ZijGGE03+83UwgtlLMuaSpDQZqDmULIbiUtVIaCBmK3jLQ4GAoayHx3y0jqhvMUJEktQ0GS1DIUJEktQ0GS1DIUJEktRx9JOORWmmQoSDjkVppk95EkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJaAw1JTXJ0Vf2v7bVpfMx2X2HH3UuayaDzFP4bcNQAbRoTs91X2HH3kmYyaygk+W3gxcDyJO/p2/VMYMkoC5Mkzb/tXVPYDdibXnjs0/fzEPC6Qd4gyZIkNyX52+bxyiTXJ9mQ5ItJdmvan9E83tDsn9jJv0mStJNmPVOoqr8H/j7J56rq7p18j3cBt9M7uwD4L8BfVdWFST4LnAqc0fz+SVUdmuTE5nmv38n3lCTthEGvKTwjyVpgov81VfWy2V6U5CDg94CPAe9JEuBlwBubp5wLfIReKBzfbANcDPz3JKmqGrBGaWzMtsDegcufzVcu+eI8VyQNZtBQ+BLwWeBM4PEdOP6ngD+m1+UEsD/w06p6rHm8EVjRbK8A7gWoqseS/Kx5/tb+AyZZA6wBOOSQQ3agFGn+zLbA3obPf2ieq5EGN2goPFZVZ+zIgZO8CnigqtYnOWZHC5tJVa0F1gKsWrXKswhJGqJBQ+ErSf4jcBnwT5ONVTX9QPieo4FXJzkO2J3eNYVPA/smWdqcLRwEbGqevwk4GNiYZCnwLODBHfljpIVutvkldjtpPgwaCic3v9/X11bAv5jpBVX1QeCDAM2Zwnur6k1JvkRv5NKFzXG/3Lzk8ubxt5v9V3s9QYvNbPNL7HbSfBgoFKpq5RDf8/3AhUn+ArgJOKtpPws4P8kG4MfAiUN8T0nSAAZd5uLN07VX1XmDvL6qrgWubbbvAF40zXP+Efh3gxxPWsi89afG2aDdRy/s294dWA3cCAwUChoN1zdamLz1p8bZoN1H7+x/nGRfetcE1CHXN5I0bDu7dPYjwDCvM0iSxsCg1xS+Qm+0EfQWwnsecNGoipIkdWPQawqf6Nt+DLi7qjaOoB5JUocG6j5qFsb7Ab3lKvYDfjnKoiRJ3RgoFJKcAHyH3pDRE4Drkwy0dLYkaeEYtPvodOCFVfUAQJLlwDforWYqSdpFDBoKT5sMhMaD7PzIJe0A5yJorlxPSTti0FD4apKvAV9oHr8euGI0JamfcxE0iO19eXj5n04/z9T1lDTV9u7RfChwQFW9L8m/BV7S7Po2cMGoi5M0GL88aFi2d6bwKZqVTqvqUuBSgCS/3uz7/RHWJqmPayZpPmwvFA6oqtumNlbVbUkmRlOSpOm4ZpLmw/YuFu87y749hliHJGkMbC8U1iX591Mbk7wNWD+akiRJXdle99G7gcuSvIknQ2AVsBvwByOsS5LUgVlDoaruB16c5HeAI5rmv6uqq0demSRp3g16P4VrgGtGXIskqWPOSpYktQwFSVLLUJAktQZd+0jSIuNCeouToSBpWrOtp+RCersuQ0FaxFxPSVMZCtIi5npKmmpkF5qTHJzkmiTfT/K9JO9q2p+d5OtJ/m/ze7+mPUk+k2RDkluTHDWq2iRJ0xvlmcJjwH+uqhuT7AOsT/J14C3AVVX18SQfAD4AvB94JXBY8/ObwBnN712ed1eTNC5GFgpVtRnY3Gw/nOR2YAVwPHBM87RzgWvphcLxwHlVVcB1SfZNcmBznF2aN0iRNC7mZZ5Cc++FI4Hr6d2jYfKD/j7ggGZ7BXBv38s2Nm2SpHky8lBIsjdwCfDuqnqof19zVlA7eLw1SdYlWbdly5YhVipJGunooyRPpxcIFzS38wS4f7JbKMmBwANN+ybg4L6XH9S0PUVVrQXWAqxatWqHAkXScMw2lNWJbQvbyEIhSYCzgNur6pN9uy4HTgY+3vz+cl/7O5JcSO8C888Ww/UEaSGabSirE9sWtlGeKRwNnATcluTmpu00emFwUZJTgbuBE5p9VwDHARuAR4FTRlibpA64dMb4G+Xoo28BmWH36mmeX8DbR1WPpO65dMb4c0azpLEw03UKzyDml6EgaSzMdJ3CM4j5ZSjME2ctS1oIDIV54qxlLRauvLqwGQqShsqVVxc2b8cpSWoZCpKklqEgSWp5TUHSguUM6eEzFCSNte2NZnr5n5437T7nN+wcQ0HSWHM00/wyFIbICWrS+HB5751jKAyRE9Sk8THbGcaVH3mjgTEDQ0HSouP9IGbmkFRJUstQkCS17D6SpBFbSPMpDIUd5AgjSTtqId1xzlDYQY4wkrQrMxQkaUALqRtoZxkKktRnvpfVmO39Nt1zNysOee60+0YVQoaCJPXZ2WU1dvaOc9t7v/m+FmEoSNIQ7CprNBkK03CEkaTFylCYhiOMJC1WzmiWJLXG6kwhySuATwNLgDOr6uOjei+7iCRpW2MTCkmWAH8NHAtsBG5IcnlVfX8U72cXkSRta5y6j14EbKiqO6rql8CFwPEd1yRJi0qqqusaAEjyOuAVVfW25vFJwG9W1TumPG8NsKZ5eDjww3ktdHbLgK1dFzGLca8PrHEYxr0+sMZhmEt9z62q5dPtGJvuo0FV1Vpgbdd1TCfJuqpa1XUdMxn3+sAah2Hc6wNrHIZR1TdO3UebgIP7Hh/UtEmS5sk4hcINwGFJVibZDTgRuLzjmiRpURmb7qOqeizJO4Cv0RuSenZVfa/jsnbUWHZr9Rn3+sAah2Hc6wNrHIaR1Dc2F5olSd0bp+4jSVLHDAVJUstQGIIkZyd5IMl3u65lOkkOTnJNku8n+V6Sd3Vd01RJdk/ynSS3NDV+tOuappNkSZKbkvxt17VMJ8ldSW5LcnOSdV3XM50k+ya5OMkPktye5Le7rmlSksOb/+0mfx5K8u6u65oqyR81/06+m+QLSXYf2rG9pjB3SV4K/Bw4r6qO6LqeqZIcCBxYVTcm2QdYD7xmVEuI7IwkAfaqqp8neTrwLeBdVXVdx6U9RZL3AKuAZ1bVq7quZ6okdwGrqmpsJ10lORf4n1V1ZjPScM+q+mnHZW2jWXpnE71JtHd3Xc+kJCvo/fv41ar6RZKLgCuq6nPDOL5nCkNQVd8Epl9dbwxU1eaqurHZfhi4HVjRbVVPVT0/bx4+vfkZq28sSQ4Cfg84s+taFqokzwJeCpwFUFW/HMdAaKwGfjROgdBnKbBHkqXAnsA/DOvAhsIik2QCOBK4vuNSttF0zdwMPAB8varGrcZPAX8MPNFxHbMp4Mok65slYcbNSmALcE7TDXdmkr26LmoGJwJf6LqIqapqE/AJ4B5gM/CzqrpyWMc3FBaRJHsDlwDvrqqHuq5nqqp6vKpeQG82+4uSjE1XXJJXAQ9U1fqua9mOl1TVUcArgbc3XZvjZClwFHBGVR0JPAJ8oNuSttV0a70a+FLXtUyVZD96i4WuBJ4D7JXkD4d1fENhkWj66S8BLqiqS7uuZzZNd8I1wCs6LqXf0cCrmz77C4GXJfl8tyVtq/kWSVU9AFxGb/XhcbIR2Nh3FngxvZAYN68Ebqyq+7suZBq/C9xZVVuq6v8BlwIvHtbBDYVFoLmIexZwe1V9sut6ppNkeZJ9m+096N1X4wedFtWnqj5YVQdV1QS9boWrq2po386GIclezUACmi6ZlwNjNSKuqu4D7k1yeNO0GhibAQ993sAYdh017gF+K8mezb/t1fSuEw6FoTAESb4AfBs4PMnGJKd2XdMURwMn0ft2OznU7riui5riQOCaJLfSWwfr61U1lsM+x9gBwLeS3AJ8B/i7qvpqxzVN553ABc3/1y8A/rLbcp6qCdRj6X0DHzvNWdbFwI3AbfQ+x4e25IVDUiVJLc8UJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GaRpLqn5yWZGmSLTu6OmqSa5OsarbvSrJs2LVKw2QoSNN7BDiimUgHvXHrmzqsR5oXhoI0syvorYoKU2a4NrOHz27uAXFTkuOb9j2SXNjcJ+AyYI+pB00y0ez/m2ZN/CsnwyfJoUm+0dxX4sYkvzL6P1N6kqEgzexC4MTmBia/wVNXlj2d3lIXLwJ+B/ivzUzY/wA8WlXPAz4M/OsZjn0Y8NdV9WvAT4HXNu0XNO3Pp7eezebh/knS7JZ2XYA0rqrq1map8TfQO2vo93J6C+S9t3m8O3AIvXsFfKbv9bfOcPg7q+rmZns9MNGsW7Siqi5rXv+Pw/pbpEEZCtLsLqe3dv0xwP597QFeW1U/7H9yb32ygfxT3/bjTNPNJHXB7iNpdmcDH62q26a0fw14Z7NKJUmObNq/CbyxaTuCXrfTQJq74m1M8prm9c9Isufcypd2jKEgzaKqNlbVZ6bZ9ef0bhl6a5LvNY8BzgD2TnI78Gf0uoZ2xEnAf2q6nf438M93rnJp57hKqiSp5ZmCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKn1/wHrQep7DzzzWwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_boxplot_and_hist(train_t, \"MedInc\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fsml",
   "language": "python",
   "name": "fsml"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": "block",
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
